*---------------------------------------------------
cap clear mata
cap clear
capture log close
program drop _all
macro drop _all
version 10.0
set mem 700m
set mat 2000
set more off
cd "E:\REStat_MS14767_Vol96(2)\Data preparation Compustat"
log using "3_compustatvars.log", replace
*---------------------------------------------------------------


********************************************************************************
********                                                	      **************
********  in this file we create a sample of compustat variables  **************
********                compustatvars.dta                  		**************
********************************************************************************


use "compustat_master_america.dta", clear
so ticker year
keep  ticker vantagekey name countryinc employees sales forn_sal ta year SICALL2-SICALL15 SIC4 CUSIP rdexpense mkvalq
rename CUSIP cusip

gen SIC2=int(SIC4/100)
label var SIC2 "SIC4/100"



*****           WE KEEP ONLY AMERICAN FIRMS          *****
*****             FOR WHICH WE HAVE SALES            *****


count if countryinc!=0

* sales are changed into sales_new by substracting foreign sales (forn_sales)

replace forn_sales=0 if  forn_sales==.
gen sales_new=sales- forn_sales
count if sales_new<0
replace sales_new=. if sales_new<0
count if sales_new==.
drop if sales_new==.
label var sales_new "Sales - foreign sales"

compress
drop countryinc vantagekey sales forn_sales


********************* we correct some variables  ****************
********************* and generate some others     ****************

so ticker year
egen firmnum=group(ticker)
label var firmnum "group(ticker)"
egen ncomp=count(firmnum), by(SIC4 year)
label var ncomp "number of competitors per year/SIC4"

* this is just to control that there are no mistake in the panel
* if everythnig is correct no observation should be dropped
drop if firmnum==firmnum[_n-1]& year==year[_n-1]


********************************************************
*****           
*****    here we define market shares (our dep. var.) defining the market as the SIC4 industry
*****           
********************************************************

egen tot_sales=sum(sales_new), by (SIC4 year)
label var tot_sales "sum of sales by SIC4, year"
gen MS=sales_new/tot_sales
label var MS "Market shares firm/year at the SIC4 level"

egen tot_sales2=sum(sales_new), by (SIC2 year)
label var tot_sales2 "sum of sales by SIC2, year"
gen MS2=sales_new/tot_sales2
label var MS2 "Market shares firm/year at the SIC2 level"


gen aa=MS*MS
egen HHI=sum(aa), by(SIC4 year)
label var HHI "HHI index at the SIC4 level"
drop aa

rename cusip CUSIP
keep  ticker  year CUSIP SICALL2 SICALL3 SICALL4 SICALL5 SICALL6 SICALL7 SICALL8 SICALL9 SICALL10 SICALL11 SICALL12 SICALL13 SICALL14 SICALL15 SIC4 MS MS2 HHI

so ticker year
save "compustatvars.dta", replace
